Data Packages
Baseine pacakges for our analysis
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.3 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.3 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.2 ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(janitor)
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
library(here)
here() starts at U:/DS241/bikeshare_23
library(openmeteo)
Loading the Bikeshare Raw Data
df1 <- read_csv("data_raw/202309-capitalbikeshare-tripdata.csv", show_col_types = FALSE) %>% clean_names()
Creating Dataframe 2 - Interjoing
df2s=df1 %>%
select(rideable_type,member_casual,
contains("start"),ride_id)%>%
mutate(start_stop="start")%>%
rename(t=started_at,
station_name=start_station_name,
station_id=start_station_id,
lat=start_lat,
lng=start_lng)
df2e=df1 %>%
select(ride_id,rideable_type,member_casual,
contains("end")) %>%
mutate(start_stop="stop") %>%
rename(t=ended_at,
station_name=end_station_name,
station_id=end_station_id,
lat=end_lat,
lng=end_lng)
df2=bind_rows(df2s,df2e) %>%
arrange(t) %>%
mutate(rider_delta=(start_stop=="start")*2-1) %>% #change in ridership
mutate(riders=cumsum(rider_delta)) %>%
relocate(riders,.after=t)
Plotting the Dataframe
df2 %>%
ggplot(aes(t,riders)) +
geom_line()

Creating a subsampled dataset
df_s=df2 %>% slice_head(n=1000)
Why looking at slicing to every one hundredth data point will be bad?
- Will likely track features however will jump data points whether the
data is sparse or not. Want the data to be equally spaced in time.
Starting with 100 rows to get the calculation right and then see if
oyu can do it with a bigger data set. Data stored in number of seconds
based on a reference point
Round down to the nearest 10 mins, relocate t_f next to t, and filter
the data points by t_f
df_e=df_s |>
mutate(t_f=floor_date(t, "10 mins")) %>%
relocate(t_f,.after=t) %>%
slice_head(n=1,by=t_f)
Applying to Previous Manipulations to the Entire Data Set
df_r=df2 |>
mutate(t_f=floor_date(t,"10 mins")) %>%
relocate(t_f,.after=t) %>%
slice_head(n=1,by=t_f)
Creating the Associated Plot
p1=df2 %>%
filter(day(t)==18) %>%
ggplot(aes(x=t, y=riders)) +
geom_line() +
ggtitle("Riders on 18Sep")
p1+
geom_line(data=df_r %>% filter(day(t)==18),
color="red")

df_r=df2 |>
mutate(t_f=floor_date(t,"1 mins")) %>%
relocate(t_f,.after=t) %>%
slice_head(n=1,by=t_f)
p1=df2 %>%
filter(day(t)==18) %>%
ggplot(aes(x=t, y=riders)) +
geom_line() +
ggtitle("Riders on 18Sep")
p1+
geom_line(data=df_r %>% filter(day(t)==18),
color="red")

% Get Weather Data
df_w=weather_history("Washington",
start = "2023-09-01",
end = "2023-09-30",
hourly = c("apparent_temperature",
"wind_speed_10m",
"precipitation")
)
`geocode()` has matched "Washington" to:
Washington in Washington, D.C., United States
Population: 601723
Co-ordinates: c(38.89511, -77.03637)
Merging bike and weather data
df_s=df2 %>% slice_sample(n=1000)
df_j=df_s %>% left_join(df_w,
by=join_by(closest(t>=datetime)))
df_j=df_s %>%
left_join(df_w,by=join_by(closest(t>=datetime))) %>%
relocate(datetime, .after=t)
head(df_j)
Investigating the Time Zone Mismatch Between the Bikeshare and
Weather Datasets
df_j$t[1:5]
[1] "2023-09-05 07:07:28 UTC" "2023-09-22 11:00:32 UTC" "2023-09-14 10:05:05 UTC" "2023-09-09 09:17:47 UTC"
[5] "2023-09-08 15:10:02 UTC"
df_j$datetime[1:5]
[1] "2023-09-05 03:00:00 EDT" "2023-09-22 07:00:00 EDT" "2023-09-14 06:00:00 EDT" "2023-09-09 05:00:00 EDT"
[5] "2023-09-08 11:00:00 EDT"
Sys.timezone()
[1] "America/New_York"
df_s_est <- force_tz(df_s, tzone = "America/New_York")
df_j_est=df_s_est %>%
left_join(df_w,by=join_by(closest(t>=datetime))) %>%
relocate(datetime, .after=t)
head(df_j_est)
df_j_est$datetime[1:5]
[1] "2023-09-05 07:00:00 EDT" "2023-09-22 11:00:00 EDT" "2023-09-14 10:00:00 EDT" "2023-09-09 09:00:00 EDT"
[5] "2023-09-08 15:00:00 EDT"
df2$t[1:5]
[1] "2023-09-01 00:00:44 UTC" "2023-09-01 00:01:48 UTC" "2023-09-01 00:01:48 UTC" "2023-09-01 00:02:03 UTC"
[5] "2023-09-01 00:02:21 UTC"
force_tz(df2$t[1:5],"America/New_York")
[1] "2023-09-01 00:00:44 EDT" "2023-09-01 00:01:48 EDT" "2023-09-01 00:01:48 EDT" "2023-09-01 00:02:03 EDT"
[5] "2023-09-01 00:02:21 EDT"
df2c=df2 %>% mutate(t=force_tz(t,tzone="America/New_York")) #corrected
df_s2=df2c %>% slice_sample(n=1000)
df_j2=df_s2 %>%
left_join(df_w,by=join_by(closest(t>=datetime))) %>%
relocate(datetime, .after=t)
head(df_j2)
NA
NA
dfc=df2c %>%
left_join(df_w,by=join_by(closest(t>=datetime))) %>%
relocate(datetime, .after=t) %>%
rename(atemp=hourly_apparent_temperature,
wind=hourly_wind_speed_10m,
prec=hourly_precipitation)
A Visualization of the Data
Coloring when the rain is greater than 1- color each point based on
whether it was raining or not
p2=dfc %>%
ggplot(aes(x=t,y=riders,color=prec>1)) +
geom_point()
p2

NA
p3 = dfc %>%
filter(day(t)==23) %>%
ggplot(aes(t,riders,color=wind))+
geom_point() +
ggtitle("Riders vs. Precipitation on Sep 10")
p3

plotly::ggplotly(p3)
LS0tDQp0aXRsZTogIkVYUEVSSU1FTlQgNF9iaWtlc2hhcmUgZGF0YSAiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIERhdGEgUGFja2FnZXMgDQoNCkJhc2VpbmUgcGFjYWtnZXMgZm9yIG91ciBhbmFseXNpcw0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeShoZXJlKQ0KbGlicmFyeShvcGVubWV0ZW8pDQpgYGANCg0KDQojIExvYWRpbmcgdGhlIEJpa2VzaGFyZSBSYXcgRGF0YQ0KYGBge3J9DQpkZjEgPC0gcmVhZF9jc3YoImRhdGFfcmF3LzIwMjMwOS1jYXBpdGFsYmlrZXNoYXJlLXRyaXBkYXRhLmNzdiIsIHNob3dfY29sX3R5cGVzID0gRkFMU0UpICU+JSBjbGVhbl9uYW1lcygpDQpgYGANCg0KIyBDcmVhdGluZyBEYXRhZnJhbWUgMiAtIEludGVyam9pbmcgDQpgYGB7cn0NCmRmMnM9ZGYxICU+JSANCiAgc2VsZWN0KHJpZGVhYmxlX3R5cGUsbWVtYmVyX2Nhc3VhbCwNCiAgICAgICAgICAgICAgICAJY29udGFpbnMoInN0YXJ0IikscmlkZV9pZCklPiUgDQogIG11dGF0ZShzdGFydF9zdG9wPSJzdGFydCIpJT4lDQogIHJlbmFtZSh0PXN0YXJ0ZWRfYXQsDQogICAgIAlzdGF0aW9uX25hbWU9c3RhcnRfc3RhdGlvbl9uYW1lLA0KICAgICAJc3RhdGlvbl9pZD1zdGFydF9zdGF0aW9uX2lkLA0KICAgICAJbGF0PXN0YXJ0X2xhdCwNCiAgICAgCWxuZz1zdGFydF9sbmcpDQoNCmRmMmU9ZGYxICU+JQ0Kc2VsZWN0KHJpZGVfaWQscmlkZWFibGVfdHlwZSxtZW1iZXJfY2FzdWFsLA0KICAgICAgICAgICAgICAgIAljb250YWlucygiZW5kIikpICU+JQ0KICBtdXRhdGUoc3RhcnRfc3RvcD0ic3RvcCIpICU+JQ0KICByZW5hbWUodD1lbmRlZF9hdCwNCiAgICAgCXN0YXRpb25fbmFtZT1lbmRfc3RhdGlvbl9uYW1lLA0KICAgICAJc3RhdGlvbl9pZD1lbmRfc3RhdGlvbl9pZCwNCiAgICAgCWxhdD1lbmRfbGF0LA0KICAgICAJbG5nPWVuZF9sbmcpDQoNCmRmMj1iaW5kX3Jvd3MoZGYycyxkZjJlKSAlPiUNCiAgYXJyYW5nZSh0KSAlPiUNCiAgbXV0YXRlKHJpZGVyX2RlbHRhPShzdGFydF9zdG9wPT0ic3RhcnQiKSoyLTEpICU+JSAjY2hhbmdlIGluIHJpZGVyc2hpcCANCiAgbXV0YXRlKHJpZGVycz1jdW1zdW0ocmlkZXJfZGVsdGEpKSAlPiUNCiAgcmVsb2NhdGUocmlkZXJzLC5hZnRlcj10KQ0KDQpgYGANCg0KIyBQbG90dGluZyB0aGUgRGF0YWZyYW1lDQoNCmBgYHtyfQ0KZGYyICU+JSANCiAgZ2dwbG90KGFlcyh0LHJpZGVycykpICsNCiAgZ2VvbV9saW5lKCkNCmBgYA0KIyBDcmVhdGluZyBhIHN1YnNhbXBsZWQgZGF0YXNldCANCg0KYGBge3J9DQpkZl9zPWRmMiAlPiUgc2xpY2VfaGVhZChuPTEwMDApDQoNCmBgYA0KDQpXaHkgbG9va2luZyBhdCBzbGljaW5nIHRvIGV2ZXJ5IG9uZSBodW5kcmVkdGggZGF0YSBwb2ludCB3aWxsIGJlIGJhZD8gLSBXaWxsIGxpa2VseSB0cmFjayBmZWF0dXJlcyBob3dldmVyIHdpbGwganVtcCBkYXRhIHBvaW50cyB3aGV0aGVyIHRoZSBkYXRhIGlzIHNwYXJzZSBvciBub3QuIFdhbnQgdGhlIGRhdGEgdG8gYmUgZXF1YWxseSBzcGFjZWQgaW4gdGltZS4NCg0KU3RhcnRpbmcgd2l0aCAxMDAgcm93cyB0byBnZXQgdGhlIGNhbGN1bGF0aW9uIHJpZ2h0IGFuZCB0aGVuIHNlZSBpZiBveXUgY2FuIGRvIGl0IHdpdGggYSBiaWdnZXIgZGF0YSBzZXQuIA0KRGF0YSBzdG9yZWQgaW4gbnVtYmVyIG9mIHNlY29uZHMgYmFzZWQgb24gYSByZWZlcmVuY2UgcG9pbnQNCg0KDQpSb3VuZCBkb3duIHRvIHRoZSBuZWFyZXN0IDEwIG1pbnMsIHJlbG9jYXRlIHRfZiBuZXh0IHRvIHQsIGFuZCBmaWx0ZXIgdGhlIGRhdGEgcG9pbnRzIGJ5IHRfZg0KDQpgYGB7cn0NCmRmX2U9ZGZfcyB8Pg0KICBtdXRhdGUodF9mPWZsb29yX2RhdGUodCwgIjEwIG1pbnMiKSkgJT4lDQogICByZWxvY2F0ZSh0X2YsLmFmdGVyPXQpICU+JQ0KICBzbGljZV9oZWFkKG49MSxieT10X2YpDQpgYGANCg0KDQojIEFwcGx5aW5nIHRvIFByZXZpb3VzIE1hbmlwdWxhdGlvbnMgdG8gdGhlIEVudGlyZSBEYXRhIFNldCANCmBgYHtyfQ0KZGZfcj1kZjIgfD4NCiAgbXV0YXRlKHRfZj1mbG9vcl9kYXRlKHQsIjEwIG1pbnMiKSkgJT4lDQogIHJlbG9jYXRlKHRfZiwuYWZ0ZXI9dCkgJT4lDQogIHNsaWNlX2hlYWQobj0xLGJ5PXRfZikNCmBgYA0KDQogDQojIENyZWF0aW5nIHRoZSBBc3NvY2lhdGVkIFBsb3QgDQpgYGB7cn0NCg0KcDE9ZGYyICU+JSANCiAgZmlsdGVyKGRheSh0KT09MTgpICU+JQ0KICBnZ3Bsb3QoYWVzKHg9dCwgeT1yaWRlcnMpKSArDQogIGdlb21fbGluZSgpICsNCiAgZ2d0aXRsZSgiUmlkZXJzIG9uIDE4U2VwIikNCg0KcDErDQogIGdlb21fbGluZShkYXRhPWRmX3IgJT4lIGZpbHRlcihkYXkodCk9PTE4KSwNCiAgY29sb3I9InJlZCIpDQoNCmBgYA0KIA0KDQpgYGB7cn0NCmRmX3I9ZGYyIHw+DQogIG11dGF0ZSh0X2Y9Zmxvb3JfZGF0ZSh0LCIxIG1pbnMiKSkgJT4lDQogIHJlbG9jYXRlKHRfZiwuYWZ0ZXI9dCkgJT4lDQogIHNsaWNlX2hlYWQobj0xLGJ5PXRfZikNCg0KYGBgDQoNCg0KYGBge3J9DQpwMT1kZjIgJT4lIA0KICBmaWx0ZXIoZGF5KHQpPT0xOCkgJT4lDQogIGdncGxvdChhZXMoeD10LCB5PXJpZGVycykpICsNCiAgZ2VvbV9saW5lKCkgKw0KICBnZ3RpdGxlKCJSaWRlcnMgb24gMThTZXAiKQ0KDQpwMSsNCiAgZ2VvbV9saW5lKGRhdGE9ZGZfciAlPiUgZmlsdGVyKGRheSh0KT09MTgpLA0KICBjb2xvcj0icmVkIikNCmBgYA0KDQolIEdldCBXZWF0aGVyIERhdGENCmBgYHtyfQ0KZGZfdz13ZWF0aGVyX2hpc3RvcnkoIldhc2hpbmd0b24iLA0KICAgICAgICAgICAgICAgIAlzdGFydCA9ICIyMDIzLTA5LTAxIiwNCiAgICAgICAgICAgICAgICAJZW5kID0gIjIwMjMtMDktMzAiLA0KICAgICAgICAgICAgICAgIAlob3VybHkgPSBjKCJhcHBhcmVudF90ZW1wZXJhdHVyZSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAJIndpbmRfc3BlZWRfMTBtIiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgIAkicHJlY2lwaXRhdGlvbiIpDQopDQoNCmBgYA0KIyBNZXJnaW5nIGJpa2UgYW5kIHdlYXRoZXIgZGF0YQ0KDQpgYGB7cn0NCmRmX3M9ZGYyICU+JSBzbGljZV9zYW1wbGUobj0xMDAwKQ0KZGZfaj1kZl9zICU+JSBsZWZ0X2pvaW4oZGZfdywNCiAgICAgICAgICAgICAgICAgICAgIAlieT1qb2luX2J5KGNsb3Nlc3QodD49ZGF0ZXRpbWUpKSkNCmBgYA0KDQoNCmBgYHtyfQ0KZGZfaj1kZl9zICU+JSANCiAgIGxlZnRfam9pbihkZl93LGJ5PWpvaW5fYnkoY2xvc2VzdCh0Pj1kYXRldGltZSkpKSAgJT4lDQogICByZWxvY2F0ZShkYXRldGltZSwgLmFmdGVyPXQpDQogDQpoZWFkKGRmX2opDQpgYGANCg0KIyMgSW52ZXN0aWdhdGluZyB0aGUgVGltZSBab25lIE1pc21hdGNoIEJldHdlZW4gdGhlIEJpa2VzaGFyZSBhbmQgV2VhdGhlciBEYXRhc2V0cyANCg0KYGBge3J9DQpkZl9qJHRbMTo1XQ0KZGZfaiRkYXRldGltZVsxOjVdDQoNCmBgYA0KDQpgYGB7cn0NClN5cy50aW1lem9uZSgpDQpgYGANCg0KYGBge3J9DQpkZl9zX2VzdCA8LSBmb3JjZV90eihkZl9zLCB0em9uZSA9ICJBbWVyaWNhL05ld19Zb3JrIikNCmBgYA0KDQoNCmBgYHtyfQ0KZGZfal9lc3Q9ZGZfc19lc3QgJT4lIA0KICAgbGVmdF9qb2luKGRmX3csYnk9am9pbl9ieShjbG9zZXN0KHQ+PWRhdGV0aW1lKSkpICAlPiUNCiAgIHJlbG9jYXRlKGRhdGV0aW1lLCAuYWZ0ZXI9dCkNCiANCmhlYWQoZGZfal9lc3QpDQpgYGANCg0KYGBge3J9DQpkZl9qX2VzdCR0WzE6NV0NCmRmX2pfZXN0JGRhdGV0aW1lWzE6NV0NCmBgYA0KYGBge3J9DQpkZjIkdFsxOjVdDQpmb3JjZV90eihkZjIkdFsxOjVdLCJBbWVyaWNhL05ld19Zb3JrIikNCmBgYA0KDQpgYGB7cn0NCmRmMmM9ZGYyICU+JSBtdXRhdGUodD1mb3JjZV90eih0LHR6b25lPSJBbWVyaWNhL05ld19Zb3JrIikpICNjb3JyZWN0ZWQNCiANCmRmX3MyPWRmMmMgJT4lIHNsaWNlX3NhbXBsZShuPTEwMDApDQogDQpkZl9qMj1kZl9zMiAlPiUgDQogICBsZWZ0X2pvaW4oZGZfdyxieT1qb2luX2J5KGNsb3Nlc3QodD49ZGF0ZXRpbWUpKSkgICU+JQ0KICAgcmVsb2NhdGUoZGF0ZXRpbWUsIC5hZnRlcj10KQ0KIA0KaGVhZChkZl9qMikNCg0KYGBgDQoNCmBgYHtyfQ0KZGZjPWRmMmMgJT4lIA0KICBsZWZ0X2pvaW4oZGZfdyxieT1qb2luX2J5KGNsb3Nlc3QodD49ZGF0ZXRpbWUpKSkgJT4lDQogICByZWxvY2F0ZShkYXRldGltZSwgLmFmdGVyPXQpICU+JQ0KICByZW5hbWUoYXRlbXA9aG91cmx5X2FwcGFyZW50X3RlbXBlcmF0dXJlLA0KICAgICAgICAgd2luZD1ob3VybHlfd2luZF9zcGVlZF8xMG0sDQogICAgICAgICBwcmVjPWhvdXJseV9wcmVjaXBpdGF0aW9uKQ0KYGBgDQoNCg0KIyMgQSBWaXN1YWxpemF0aW9uIG9mIHRoZSBEYXRhDQoNCkNvbG9yaW5nIHdoZW4gdGhlIHJhaW4gaXMgZ3JlYXRlciB0aGFuIDEtIGNvbG9yIGVhY2ggcG9pbnQgYmFzZWQgb24gd2hldGhlciBpdCB3YXMgcmFpbmluZyBvciBub3QgDQoNCmBgYHtyfQ0KcDI9ZGZjICU+JQ0KICBnZ3Bsb3QoYWVzKHg9dCx5PXJpZGVycyxjb2xvcj1wcmVjPjEpKSArDQogIGdlb21fcG9pbnQoKQ0KcDINCiAgDQpgYGANCg0KDQpgYGB7cn0NCnAzID0gZGZjICU+JQ0KICBmaWx0ZXIoZGF5KHQpPT0yMykgJT4lDQogIGdncGxvdChhZXModCxyaWRlcnMsY29sb3I9d2luZCkpKw0KICBnZW9tX3BvaW50KCkgKw0KICBnZ3RpdGxlKCJSaWRlcnMgdnMuIFByZWNpcGl0YXRpb24gb24gU2VwIDEwIikNCg0KcDMNCg0KYGBgDQoNCg0KYGBge3J9DQpwbG90bHk6OmdncGxvdGx5KHAzKQ0KYGBgDQoNCg0KDQoNCg0KDQoNCg==